{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     aardvark\n",
       "1    artichoke\n",
       "2          NaN\n",
       "3      avocado\n",
       "dtype: object"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#1.检测以下一维数组是否为空\n",
    "data=pd.Series(['aardvark', 'artichoke', np.nan, 'avocado'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1    False\n",
       "2     True\n",
       "3    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方式一 \n",
    "# data.isna()\n",
    "\n",
    "# 方式二\n",
    "# data.isnull()\n",
    "\n",
    "# 方式三\n",
    "pd.isna(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    NaN\n",
       "2    3.5\n",
       "3    NaN\n",
       "4    7.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.取出非空数据\n",
    "data=pd.Series([1, np.nan, 3.5, np.nan, 7])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "2    3.5\n",
       "4    7.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方式一 \n",
    "# data[data.isnull()==False]\n",
    "\n",
    "# 方式二\n",
    "# data[data.notnull()]\n",
    "\n",
    "# 方式三\n",
    "# data[pd.isnull(data)==False]\n",
    "\n",
    "#方式四\n",
    "data[pd.notnull(data)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "2  NaN  NaN  NaN\n",
       "3  NaN  6.5  3.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3.删除二维数组中的全为np.nan的那些行\n",
    "data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],\n",
    "[np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "3  NaN  6.5  3.0"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#和data.dropna(how=\"all\",axis=0)等效\n",
    "data.dropna(how=\"all\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2   3\n",
       "0  1.0  6.5  3.0 NaN\n",
       "1  1.0  NaN  NaN NaN\n",
       "2  NaN  NaN  NaN NaN\n",
       "3  NaN  6.5  3.0 NaN"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4.删除二维数组中的全为np.nan的那些列\n",
    "data = pd.DataFrame([[1., 6.5, 3,np.nan], [1., np.nan, np.nan,np.nan],\n",
    "[np.nan, np.nan, np.nan], [np.nan, 6.5, 3.,np.nan]])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "2  NaN  NaN  NaN\n",
       "3  NaN  6.5  3.0"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# axis=1表示删除列，0代表删除行，是默认值，可以不写\n",
    "data.dropna(how=\"all\",axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "2  NaN  NaN  NaN\n",
       "3  NaN  6.5  3.0"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 5.空数据用0补全\n",
    "data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],\n",
    "[np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 方式一 使用0填充NaN位置 inplace=True：修改原数据\n",
    "data.fillna(value=\"0\",inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0    1  2\n",
       "0  1  6.5  3\n",
       "1  1    0  0\n",
       "2  0    0  0\n",
       "3  0  6.5  3"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "2  NaN  NaN  NaN\n",
       "3  NaN  6.5  3.0"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 6.第1列空数据用0补全,第2列用1补全\n",
    "data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],\n",
    "[np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 第一列空数据用0补全， inplace=True:修改原数据\n",
    "data[0].fillna(0,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  NaN  NaN\n",
       "2  0.0  NaN  NaN\n",
       "3  0.0  6.5  3.0"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[1].fillna(1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2\n",
       "0  1.0  6.5  3.0\n",
       "1  1.0  1.0  NaN\n",
       "2  0.0  1.0  NaN\n",
       "3  0.0  6.5  3.0"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4\n",
       "6  two   4"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 7.判断各行是否重复\n",
    "data = pd.DataFrame({'k1': ['one', 'two'] * 3 + ['two'],\n",
    "'k2': [1, 1, 2, 3, 3, 4, 4]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1    False\n",
       "2    False\n",
       "3    False\n",
       "4    False\n",
       "5    False\n",
       "6     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 重复返回True,否则返回False\n",
    "data.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4\n",
       "6  two   4"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 8.删除重复的行\n",
    "data = pd.DataFrame({'k1': ['one', 'two'] * 3 + ['two'],\n",
    "'k2': [1, 1, 2, 3, 3, 4, 4]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 删除重复行，inplace=True:修改原数据\n",
    "data.drop_duplicates(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4\n",
       "6  two   4"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 9.根据k1列过滤重复项\n",
    "data = pd.DataFrame({'k1': ['one', 'two'] * 3 + ['two'],\n",
    "'k2': [1, 1, 2, 3, 3, 4, 4]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 根据k1过滤重复项，修改原数据\n",
    "data.drop_duplicates(['k1'],inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 根据k1,k2过滤重复项，修改原数据\n",
    "data.drop_duplicates(['k1','k2'],inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 根据k1,k2过滤重复项，修改原数据,保留第一个重复项\n",
    "data.drop_duplicates(['k1','k2'],inplace=True,keep=\"first\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "5  two   4"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 根据k1,k2过滤重复项，修改原数据,保留最后一个重复项\n",
    "data.drop_duplicates(['k1','k2'],inplace=True,keep=\"last\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>k1</th>\n",
       "      <th>k2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>one</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>two</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>one</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>two</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    k1  k2\n",
       "0  one   1\n",
       "1  two   1\n",
       "2  one   2\n",
       "3  two   3\n",
       "4  one   3\n",
       "6  two   4"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>bacon</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pulled pork</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>bacon</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pastrami</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>corned beef</td>\n",
       "      <td>7.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bacon</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pastrami</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>honey ham</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>nova lox</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          food  price\n",
       "0        bacon    4.0\n",
       "1  pulled pork    3.0\n",
       "2        bacon   12.0\n",
       "3     Pastrami    6.0\n",
       "4  corned beef    7.5\n",
       "5        Bacon    8.0\n",
       "6     pastrami    3.0\n",
       "7    honey ham    5.0\n",
       "8     nova lox    6.0"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 10.以下肉类数据源,food都转换为小写,添加一个动物列,而且字符串都是小写\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame\n",
    " \n",
    "data = pd.DataFrame({'food':['bacon','pulled pork','bacon','Pastrami',\n",
    "   'corned beef','Bacon','pastrami','honey ham','nova lox'],\n",
    "     'price':[4,3,12,6,7.5,8,3,5,6]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          bacon\n",
       "1    pulled pork\n",
       "2          bacon\n",
       "3       pastrami\n",
       "4    corned beef\n",
       "5          bacon\n",
       "6       pastrami\n",
       "7      honey ham\n",
       "8       nova lox\n",
       "Name: food, dtype: object"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将food列的内容全部变成小写\n",
    "data['food'].map(str.lower)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       pig\n",
       "1       pig\n",
       "2       pig\n",
       "3       cow\n",
       "4       cow\n",
       "5       pig\n",
       "6       cow\n",
       "7       pig\n",
       "8    salmon\n",
       "Name: food, dtype: object"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#定义食物和动物的对应关系\n",
    "meat_to_animal = {\n",
    " 'bacon':'pig',\n",
    " 'pulled pork':'pig',\n",
    " 'pastrami':'cow',\n",
    " 'corned beef':'cow',\n",
    " 'honey ham':'pig',\n",
    " 'nova lox':'salmon' } \n",
    " #根据key(食物)找到对应的value(动物)\n",
    "data['food'].map(str.lower).map(meat_to_animal) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "#根据转换成小写的food列的内容找到对应的动物生成一个Series对象作为一个新列添加到data中\n",
    "data['animal'] = data['food'].map(lambda x: meat_to_animal[x.lower()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>food</th>\n",
       "      <th>price</th>\n",
       "      <th>animal</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>bacon</td>\n",
       "      <td>4.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pulled pork</td>\n",
       "      <td>3.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>bacon</td>\n",
       "      <td>12.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pastrami</td>\n",
       "      <td>6.0</td>\n",
       "      <td>cow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>corned beef</td>\n",
       "      <td>7.5</td>\n",
       "      <td>cow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Bacon</td>\n",
       "      <td>8.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pastrami</td>\n",
       "      <td>3.0</td>\n",
       "      <td>cow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>honey ham</td>\n",
       "      <td>5.0</td>\n",
       "      <td>pig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>nova lox</td>\n",
       "      <td>6.0</td>\n",
       "      <td>salmon</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          food  price  animal\n",
       "0        bacon    4.0     pig\n",
       "1  pulled pork    3.0     pig\n",
       "2        bacon   12.0     pig\n",
       "3     Pastrami    6.0     cow\n",
       "4  corned beef    7.5     cow\n",
       "5        Bacon    8.0     pig\n",
       "6     pastrami    3.0     cow\n",
       "7    honey ham    5.0     pig\n",
       "8     nova lox    6.0  salmon"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1.0\n",
       "1    -999.0\n",
       "2       2.0\n",
       "3    -999.0\n",
       "4   -1000.0\n",
       "5       3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 11.替换 -999.0  为NaN\n",
    "data = pd.Series([1., -999., 2., -999., -1000., 3.])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将-999替换为NaN并且修改原数据\n",
    "data.replace(-999.,np.nan,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1.0\n",
       "1       NaN\n",
       "2       2.0\n",
       "3       NaN\n",
       "4   -1000.0\n",
       "5       3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1.0\n",
       "1    -999.0\n",
       "2       2.0\n",
       "3    -999.0\n",
       "4   -1000.0\n",
       "5       3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 12.同时替换-999.0 和-1000为[NaN,0]\n",
    "data = pd.Series([1., -999., 2., -999., -1000., 3.])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "#方式一\n",
    "# data.replace([-999,-1000],[np.nan,0],inplace=True)\n",
    "\n",
    "# 方式二\n",
    "data.replace({-999:np.nan,-1000:0},inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    NaN\n",
       "2    2.0\n",
       "3    NaN\n",
       "4    0.0\n",
       "5    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "New York    8    9     10    11"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 13.索引名都转换为大写\n",
    "data = pd.DataFrame(np.arange(12).reshape((3, 4)),index=['Ohio', 'Colorado', 'New York'],columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>OHIO</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "OHIO        0    1      2     3\n",
       "COLORADO    4    5      6     7\n",
       "NEW YORK    8    9     10    11"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将行索引全部变成大写\n",
    "data.index=data.index.map(lambda x:x.upper())\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['one', 'two', 'three', 'four'], dtype='object')"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ONE</th>\n",
       "      <th>TWO</th>\n",
       "      <th>THREE</th>\n",
       "      <th>FOUR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>OHIO</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ONE  TWO  THREE  FOUR\n",
       "OHIO        0    1      2     3\n",
       "COLORADO    4    5      6     7\n",
       "NEW YORK    8    9     10    11"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将列索引全部变成大写\n",
    "data.columns=data.columns.map(lambda x:x.upper())\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 同时将行索引和列索引都变成大写\n",
    "data.rename(index=str.upper,columns=str.upper,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ONE</th>\n",
       "      <th>TWO</th>\n",
       "      <th>THREE</th>\n",
       "      <th>FOUR</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>OHIO</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>COLORADO</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NEW YORK</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          ONE  TWO  THREE  FOUR\n",
       "OHIO        0    1      2     3\n",
       "COLORADO    4    5      6     7\n",
       "NEW YORK    8    9     10    11"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 14.将这些数据划分为“18到25”、“26到35”、“35到60”以及“60以上”几个范围/面元\n",
    "ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]\n",
    "ages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[少年, 少年, 少年, 青年, 少年, ..., 青年, NaN, 中年, 中年, 青年]\n",
       "Length: 12\n",
       "Categories (3, object): [少年 < 青年 < 中年]"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=pd.cut(ages,[18,25,35,60],labels=['少年','青年','中年'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "少年    5\n",
       "青年    3\n",
       "中年    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计每个范围内数据的个数\n",
    "data.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.128067</td>\n",
       "      <td>0.842971</td>\n",
       "      <td>-1.216855</td>\n",
       "      <td>-1.041873</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.440124</td>\n",
       "      <td>0.244619</td>\n",
       "      <td>-0.491741</td>\n",
       "      <td>-0.713177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.431855</td>\n",
       "      <td>-0.246364</td>\n",
       "      <td>1.348252</td>\n",
       "      <td>-0.778260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.551191</td>\n",
       "      <td>0.046024</td>\n",
       "      <td>-2.031817</td>\n",
       "      <td>0.386658</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.624102</td>\n",
       "      <td>-0.618202</td>\n",
       "      <td>0.871852</td>\n",
       "      <td>-0.747220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.345315</td>\n",
       "      <td>1.235030</td>\n",
       "      <td>-1.530973</td>\n",
       "      <td>-0.036239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.206417</td>\n",
       "      <td>0.994307</td>\n",
       "      <td>-0.015177</td>\n",
       "      <td>0.853060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.488751</td>\n",
       "      <td>0.104028</td>\n",
       "      <td>1.118461</td>\n",
       "      <td>-0.450901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1.248939</td>\n",
       "      <td>1.101031</td>\n",
       "      <td>0.097533</td>\n",
       "      <td>-2.322026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.385668</td>\n",
       "      <td>-0.568774</td>\n",
       "      <td>0.146318</td>\n",
       "      <td>-0.746534</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>-0.014503</td>\n",
       "      <td>-1.083427</td>\n",
       "      <td>-1.874361</td>\n",
       "      <td>0.473122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>-1.443512</td>\n",
       "      <td>-0.830428</td>\n",
       "      <td>-1.513184</td>\n",
       "      <td>0.246885</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>-0.160384</td>\n",
       "      <td>0.212254</td>\n",
       "      <td>0.286489</td>\n",
       "      <td>1.063606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>-0.039636</td>\n",
       "      <td>1.168634</td>\n",
       "      <td>-0.702524</td>\n",
       "      <td>-0.120451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>-0.646122</td>\n",
       "      <td>0.494015</td>\n",
       "      <td>0.195474</td>\n",
       "      <td>-0.954135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.049675</td>\n",
       "      <td>-0.021876</td>\n",
       "      <td>0.641295</td>\n",
       "      <td>0.232610</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>-0.848242</td>\n",
       "      <td>-1.002163</td>\n",
       "      <td>0.022188</td>\n",
       "      <td>0.489501</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>-0.174108</td>\n",
       "      <td>0.317851</td>\n",
       "      <td>0.186293</td>\n",
       "      <td>0.780813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0.102143</td>\n",
       "      <td>-0.986552</td>\n",
       "      <td>-1.497464</td>\n",
       "      <td>0.454876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>0.549689</td>\n",
       "      <td>-0.560812</td>\n",
       "      <td>-0.470073</td>\n",
       "      <td>0.379253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1.042087</td>\n",
       "      <td>0.663857</td>\n",
       "      <td>-1.411520</td>\n",
       "      <td>0.048843</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>-0.181309</td>\n",
       "      <td>0.493980</td>\n",
       "      <td>-0.857306</td>\n",
       "      <td>1.200683</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.792836</td>\n",
       "      <td>0.612476</td>\n",
       "      <td>1.546303</td>\n",
       "      <td>1.283875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>0.566998</td>\n",
       "      <td>0.288824</td>\n",
       "      <td>0.645645</td>\n",
       "      <td>-0.088654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2.434964</td>\n",
       "      <td>-1.685709</td>\n",
       "      <td>-1.590279</td>\n",
       "      <td>0.940456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0.490226</td>\n",
       "      <td>-0.996593</td>\n",
       "      <td>0.477174</td>\n",
       "      <td>2.984096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>-0.456256</td>\n",
       "      <td>1.411051</td>\n",
       "      <td>-0.532595</td>\n",
       "      <td>0.554248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>-0.763645</td>\n",
       "      <td>0.616717</td>\n",
       "      <td>-0.648890</td>\n",
       "      <td>0.023156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>-0.826525</td>\n",
       "      <td>1.685647</td>\n",
       "      <td>-1.401092</td>\n",
       "      <td>1.479993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>-0.289533</td>\n",
       "      <td>-1.976482</td>\n",
       "      <td>0.940029</td>\n",
       "      <td>-1.153344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>970</th>\n",
       "      <td>-0.733477</td>\n",
       "      <td>-0.970311</td>\n",
       "      <td>-1.143507</td>\n",
       "      <td>1.415041</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>0.314647</td>\n",
       "      <td>1.135774</td>\n",
       "      <td>-1.023729</td>\n",
       "      <td>-0.384940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>0.396016</td>\n",
       "      <td>-0.851098</td>\n",
       "      <td>0.591913</td>\n",
       "      <td>-1.298495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>973</th>\n",
       "      <td>-0.912107</td>\n",
       "      <td>-3.198639</td>\n",
       "      <td>1.388923</td>\n",
       "      <td>-0.017521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>974</th>\n",
       "      <td>-0.136926</td>\n",
       "      <td>-1.021253</td>\n",
       "      <td>-0.020650</td>\n",
       "      <td>-0.554061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975</th>\n",
       "      <td>1.593310</td>\n",
       "      <td>-0.037772</td>\n",
       "      <td>-0.326837</td>\n",
       "      <td>0.591723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <td>0.981342</td>\n",
       "      <td>-0.672519</td>\n",
       "      <td>-0.635214</td>\n",
       "      <td>1.240668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>-1.376773</td>\n",
       "      <td>1.045674</td>\n",
       "      <td>-0.090577</td>\n",
       "      <td>-0.263997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>978</th>\n",
       "      <td>0.553375</td>\n",
       "      <td>-0.872318</td>\n",
       "      <td>-1.413758</td>\n",
       "      <td>-0.470395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979</th>\n",
       "      <td>2.493107</td>\n",
       "      <td>0.184303</td>\n",
       "      <td>0.665441</td>\n",
       "      <td>0.483892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>-0.279122</td>\n",
       "      <td>-0.625631</td>\n",
       "      <td>0.525534</td>\n",
       "      <td>-0.123210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>981</th>\n",
       "      <td>1.957269</td>\n",
       "      <td>1.291834</td>\n",
       "      <td>-0.259000</td>\n",
       "      <td>0.939095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>982</th>\n",
       "      <td>0.472432</td>\n",
       "      <td>-0.613830</td>\n",
       "      <td>-0.302080</td>\n",
       "      <td>-0.628942</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>983</th>\n",
       "      <td>-0.528522</td>\n",
       "      <td>-0.749633</td>\n",
       "      <td>0.808426</td>\n",
       "      <td>0.489379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>1.148341</td>\n",
       "      <td>1.033664</td>\n",
       "      <td>-1.296752</td>\n",
       "      <td>0.081434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>-1.717425</td>\n",
       "      <td>0.997429</td>\n",
       "      <td>-0.760262</td>\n",
       "      <td>-1.156982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>-2.079672</td>\n",
       "      <td>-0.265820</td>\n",
       "      <td>0.819744</td>\n",
       "      <td>-0.005041</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>-1.330597</td>\n",
       "      <td>-1.870904</td>\n",
       "      <td>-1.168310</td>\n",
       "      <td>-0.296418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>988</th>\n",
       "      <td>-0.474866</td>\n",
       "      <td>0.238939</td>\n",
       "      <td>-0.976220</td>\n",
       "      <td>-1.145636</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>989</th>\n",
       "      <td>-0.723039</td>\n",
       "      <td>-1.172288</td>\n",
       "      <td>1.906811</td>\n",
       "      <td>0.109814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>990</th>\n",
       "      <td>2.680837</td>\n",
       "      <td>0.571715</td>\n",
       "      <td>-1.130920</td>\n",
       "      <td>-0.086042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>991</th>\n",
       "      <td>0.855537</td>\n",
       "      <td>-0.754959</td>\n",
       "      <td>-0.115017</td>\n",
       "      <td>-0.304026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>992</th>\n",
       "      <td>0.001279</td>\n",
       "      <td>-1.148484</td>\n",
       "      <td>0.973939</td>\n",
       "      <td>1.387328</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>-0.486874</td>\n",
       "      <td>0.244926</td>\n",
       "      <td>-1.910900</td>\n",
       "      <td>-0.934385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>-0.690705</td>\n",
       "      <td>0.424428</td>\n",
       "      <td>-0.296897</td>\n",
       "      <td>0.125142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>-0.753638</td>\n",
       "      <td>0.367774</td>\n",
       "      <td>0.052298</td>\n",
       "      <td>0.839287</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>-0.633934</td>\n",
       "      <td>-1.551019</td>\n",
       "      <td>0.375492</td>\n",
       "      <td>-1.979739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>-0.120705</td>\n",
       "      <td>0.058847</td>\n",
       "      <td>-1.848418</td>\n",
       "      <td>-0.035780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>0.124240</td>\n",
       "      <td>-0.432654</td>\n",
       "      <td>-0.899063</td>\n",
       "      <td>-0.057893</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>1.855175</td>\n",
       "      <td>-0.593971</td>\n",
       "      <td>0.422161</td>\n",
       "      <td>0.176027</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2         3\n",
       "0    1.128067  0.842971 -1.216855 -1.041873\n",
       "1    0.440124  0.244619 -0.491741 -0.713177\n",
       "2   -1.431855 -0.246364  1.348252 -0.778260\n",
       "3   -0.551191  0.046024 -2.031817  0.386658\n",
       "4   -0.624102 -0.618202  0.871852 -0.747220\n",
       "5   -0.345315  1.235030 -1.530973 -0.036239\n",
       "6    1.206417  0.994307 -0.015177  0.853060\n",
       "7    0.488751  0.104028  1.118461 -0.450901\n",
       "8    1.248939  1.101031  0.097533 -2.322026\n",
       "9    0.385668 -0.568774  0.146318 -0.746534\n",
       "10  -0.014503 -1.083427 -1.874361  0.473122\n",
       "11  -1.443512 -0.830428 -1.513184  0.246885\n",
       "12  -0.160384  0.212254  0.286489  1.063606\n",
       "13  -0.039636  1.168634 -0.702524 -0.120451\n",
       "14  -0.646122  0.494015  0.195474 -0.954135\n",
       "15   0.049675 -0.021876  0.641295  0.232610\n",
       "16  -0.848242 -1.002163  0.022188  0.489501\n",
       "17  -0.174108  0.317851  0.186293  0.780813\n",
       "18   0.102143 -0.986552 -1.497464  0.454876\n",
       "19   0.549689 -0.560812 -0.470073  0.379253\n",
       "20   1.042087  0.663857 -1.411520  0.048843\n",
       "21  -0.181309  0.493980 -0.857306  1.200683\n",
       "22   1.792836  0.612476  1.546303  1.283875\n",
       "23   0.566998  0.288824  0.645645 -0.088654\n",
       "24   2.434964 -1.685709 -1.590279  0.940456\n",
       "25   0.490226 -0.996593  0.477174  2.984096\n",
       "26  -0.456256  1.411051 -0.532595  0.554248\n",
       "27  -0.763645  0.616717 -0.648890  0.023156\n",
       "28  -0.826525  1.685647 -1.401092  1.479993\n",
       "29  -0.289533 -1.976482  0.940029 -1.153344\n",
       "..        ...       ...       ...       ...\n",
       "970 -0.733477 -0.970311 -1.143507  1.415041\n",
       "971  0.314647  1.135774 -1.023729 -0.384940\n",
       "972  0.396016 -0.851098  0.591913 -1.298495\n",
       "973 -0.912107 -3.198639  1.388923 -0.017521\n",
       "974 -0.136926 -1.021253 -0.020650 -0.554061\n",
       "975  1.593310 -0.037772 -0.326837  0.591723\n",
       "976  0.981342 -0.672519 -0.635214  1.240668\n",
       "977 -1.376773  1.045674 -0.090577 -0.263997\n",
       "978  0.553375 -0.872318 -1.413758 -0.470395\n",
       "979  2.493107  0.184303  0.665441  0.483892\n",
       "980 -0.279122 -0.625631  0.525534 -0.123210\n",
       "981  1.957269  1.291834 -0.259000  0.939095\n",
       "982  0.472432 -0.613830 -0.302080 -0.628942\n",
       "983 -0.528522 -0.749633  0.808426  0.489379\n",
       "984  1.148341  1.033664 -1.296752  0.081434\n",
       "985 -1.717425  0.997429 -0.760262 -1.156982\n",
       "986 -2.079672 -0.265820  0.819744 -0.005041\n",
       "987 -1.330597 -1.870904 -1.168310 -0.296418\n",
       "988 -0.474866  0.238939 -0.976220 -1.145636\n",
       "989 -0.723039 -1.172288  1.906811  0.109814\n",
       "990  2.680837  0.571715 -1.130920 -0.086042\n",
       "991  0.855537 -0.754959 -0.115017 -0.304026\n",
       "992  0.001279 -1.148484  0.973939  1.387328\n",
       "993 -0.486874  0.244926 -1.910900 -0.934385\n",
       "994 -0.690705  0.424428 -0.296897  0.125142\n",
       "995 -0.753638  0.367774  0.052298  0.839287\n",
       "996 -0.633934 -1.551019  0.375492 -1.979739\n",
       "997 -0.120705  0.058847 -1.848418 -0.035780\n",
       "998  0.124240 -0.432654 -0.899063 -0.057893\n",
       "999  1.855175 -0.593971  0.422161  0.176027\n",
       "\n",
       "[1000 rows x 4 columns]"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 16.打印出以下数组的摘要信息  \n",
    "# np.random.randn(1000, 4):生成1000行4列的数据\n",
    "data = pd.DataFrame(np.random.randn(1000, 4))\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>-0.004281</td>\n",
       "      <td>0.035550</td>\n",
       "      <td>-0.046554</td>\n",
       "      <td>0.013932</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.984266</td>\n",
       "      <td>0.981314</td>\n",
       "      <td>1.034680</td>\n",
       "      <td>0.964870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-2.760050</td>\n",
       "      <td>-3.198639</td>\n",
       "      <td>-3.321282</td>\n",
       "      <td>-2.698521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.670170</td>\n",
       "      <td>-0.618564</td>\n",
       "      <td>-0.756039</td>\n",
       "      <td>-0.597694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>-0.015425</td>\n",
       "      <td>0.050883</td>\n",
       "      <td>-0.036330</td>\n",
       "      <td>0.000596</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.624426</td>\n",
       "      <td>0.684983</td>\n",
       "      <td>0.636083</td>\n",
       "      <td>0.615765</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.233923</td>\n",
       "      <td>3.249986</td>\n",
       "      <td>3.219384</td>\n",
       "      <td>3.375894</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0            1            2            3\n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000\n",
       "mean     -0.004281     0.035550    -0.046554     0.013932\n",
       "std       0.984266     0.981314     1.034680     0.964870\n",
       "min      -2.760050    -3.198639    -3.321282    -2.698521\n",
       "25%      -0.670170    -0.618564    -0.756039    -0.597694\n",
       "50%      -0.015425     0.050883    -0.036330     0.000596\n",
       "75%       0.624426     0.684983     0.636083     0.615765\n",
       "max       3.233923     3.249986     3.219384     3.375894"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#输出数据的摘要信息\n",
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.762329</td>\n",
       "      <td>0.883034</td>\n",
       "      <td>-0.052997</td>\n",
       "      <td>0.041565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.430549</td>\n",
       "      <td>-1.183729</td>\n",
       "      <td>0.412986</td>\n",
       "      <td>-0.537272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.791836</td>\n",
       "      <td>-1.243000</td>\n",
       "      <td>2.175150</td>\n",
       "      <td>-0.074091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.460999</td>\n",
       "      <td>2.022232</td>\n",
       "      <td>0.130960</td>\n",
       "      <td>0.837618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.573649</td>\n",
       "      <td>0.687350</td>\n",
       "      <td>1.369322</td>\n",
       "      <td>-0.526989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.460106</td>\n",
       "      <td>-0.567063</td>\n",
       "      <td>-0.734919</td>\n",
       "      <td>1.091058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.189055</td>\n",
       "      <td>-0.629686</td>\n",
       "      <td>0.222738</td>\n",
       "      <td>-0.146839</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-1.866358</td>\n",
       "      <td>0.045562</td>\n",
       "      <td>-1.481863</td>\n",
       "      <td>1.402034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-0.743142</td>\n",
       "      <td>-1.016015</td>\n",
       "      <td>1.504146</td>\n",
       "      <td>1.061250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>-1.139457</td>\n",
       "      <td>-0.723509</td>\n",
       "      <td>-0.173658</td>\n",
       "      <td>0.101536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>-0.451404</td>\n",
       "      <td>-0.079858</td>\n",
       "      <td>0.592246</td>\n",
       "      <td>0.175387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>-0.265265</td>\n",
       "      <td>-0.268328</td>\n",
       "      <td>-0.112837</td>\n",
       "      <td>-0.037180</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0.294101</td>\n",
       "      <td>0.325259</td>\n",
       "      <td>-0.027550</td>\n",
       "      <td>0.648257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>-0.729797</td>\n",
       "      <td>-0.005303</td>\n",
       "      <td>1.482832</td>\n",
       "      <td>0.159528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>-0.007283</td>\n",
       "      <td>1.095223</td>\n",
       "      <td>-0.451293</td>\n",
       "      <td>-0.950754</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0.560923</td>\n",
       "      <td>0.323252</td>\n",
       "      <td>-0.848240</td>\n",
       "      <td>-1.431280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>-0.792667</td>\n",
       "      <td>-1.451741</td>\n",
       "      <td>-1.371192</td>\n",
       "      <td>0.649933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1.945243</td>\n",
       "      <td>0.291460</td>\n",
       "      <td>0.987475</td>\n",
       "      <td>-1.255046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2.094016</td>\n",
       "      <td>-0.424357</td>\n",
       "      <td>0.772499</td>\n",
       "      <td>-1.539953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>-1.107322</td>\n",
       "      <td>0.794572</td>\n",
       "      <td>0.976629</td>\n",
       "      <td>1.204060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>0.310208</td>\n",
       "      <td>1.624616</td>\n",
       "      <td>-0.834678</td>\n",
       "      <td>-0.630555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>0.131424</td>\n",
       "      <td>-0.390378</td>\n",
       "      <td>-0.246806</td>\n",
       "      <td>0.284426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.165107</td>\n",
       "      <td>0.230188</td>\n",
       "      <td>0.449587</td>\n",
       "      <td>-0.077723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>-0.962545</td>\n",
       "      <td>0.422311</td>\n",
       "      <td>-0.071149</td>\n",
       "      <td>0.632043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0.381892</td>\n",
       "      <td>-0.479426</td>\n",
       "      <td>-0.170493</td>\n",
       "      <td>0.529948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1.587720</td>\n",
       "      <td>-1.989404</td>\n",
       "      <td>-1.393013</td>\n",
       "      <td>1.153079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>-0.248147</td>\n",
       "      <td>-1.721525</td>\n",
       "      <td>-0.576941</td>\n",
       "      <td>0.378812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>-1.032213</td>\n",
       "      <td>0.883628</td>\n",
       "      <td>-0.947101</td>\n",
       "      <td>-0.239395</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>0.673853</td>\n",
       "      <td>0.905111</td>\n",
       "      <td>0.765475</td>\n",
       "      <td>-0.173614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>-0.429051</td>\n",
       "      <td>0.081753</td>\n",
       "      <td>0.199183</td>\n",
       "      <td>-0.475000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>970</th>\n",
       "      <td>0.559770</td>\n",
       "      <td>0.201509</td>\n",
       "      <td>-0.268777</td>\n",
       "      <td>0.547237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>971</th>\n",
       "      <td>0.330805</td>\n",
       "      <td>2.412644</td>\n",
       "      <td>0.471228</td>\n",
       "      <td>-0.505078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>972</th>\n",
       "      <td>0.182293</td>\n",
       "      <td>1.977773</td>\n",
       "      <td>0.547748</td>\n",
       "      <td>0.604433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>973</th>\n",
       "      <td>-0.952285</td>\n",
       "      <td>1.169918</td>\n",
       "      <td>0.355875</td>\n",
       "      <td>0.674981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>974</th>\n",
       "      <td>0.261232</td>\n",
       "      <td>1.969686</td>\n",
       "      <td>0.000135</td>\n",
       "      <td>-0.112191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>975</th>\n",
       "      <td>-1.857906</td>\n",
       "      <td>1.254443</td>\n",
       "      <td>0.464049</td>\n",
       "      <td>-2.072417</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <td>-0.529595</td>\n",
       "      <td>-1.058427</td>\n",
       "      <td>0.866477</td>\n",
       "      <td>-0.344527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>1.317628</td>\n",
       "      <td>-0.441258</td>\n",
       "      <td>0.355024</td>\n",
       "      <td>-0.643639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>978</th>\n",
       "      <td>0.155463</td>\n",
       "      <td>0.321662</td>\n",
       "      <td>-0.110996</td>\n",
       "      <td>-1.059155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>979</th>\n",
       "      <td>2.556814</td>\n",
       "      <td>0.131430</td>\n",
       "      <td>0.535891</td>\n",
       "      <td>-0.838919</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>-1.387140</td>\n",
       "      <td>1.205119</td>\n",
       "      <td>-1.237739</td>\n",
       "      <td>0.343781</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>981</th>\n",
       "      <td>-1.189689</td>\n",
       "      <td>1.845316</td>\n",
       "      <td>-0.305253</td>\n",
       "      <td>0.021126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>982</th>\n",
       "      <td>-0.162171</td>\n",
       "      <td>1.400911</td>\n",
       "      <td>1.092700</td>\n",
       "      <td>-0.926221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>983</th>\n",
       "      <td>-0.854489</td>\n",
       "      <td>0.251458</td>\n",
       "      <td>-0.775186</td>\n",
       "      <td>0.012359</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>0.609112</td>\n",
       "      <td>0.446108</td>\n",
       "      <td>-0.626479</td>\n",
       "      <td>1.051779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>-1.922156</td>\n",
       "      <td>0.286586</td>\n",
       "      <td>-0.359273</td>\n",
       "      <td>-0.355192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>0.553620</td>\n",
       "      <td>0.321969</td>\n",
       "      <td>0.129823</td>\n",
       "      <td>0.451698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>1.015369</td>\n",
       "      <td>-0.127509</td>\n",
       "      <td>-0.784002</td>\n",
       "      <td>0.664365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>988</th>\n",
       "      <td>0.350985</td>\n",
       "      <td>-1.263185</td>\n",
       "      <td>-0.801332</td>\n",
       "      <td>-1.367007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>989</th>\n",
       "      <td>-0.053244</td>\n",
       "      <td>1.262870</td>\n",
       "      <td>1.703726</td>\n",
       "      <td>-1.053201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>990</th>\n",
       "      <td>-0.562784</td>\n",
       "      <td>1.249824</td>\n",
       "      <td>-0.722703</td>\n",
       "      <td>-0.514111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>991</th>\n",
       "      <td>-0.148050</td>\n",
       "      <td>0.144428</td>\n",
       "      <td>-1.785967</td>\n",
       "      <td>-0.771632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>992</th>\n",
       "      <td>0.097118</td>\n",
       "      <td>0.470255</td>\n",
       "      <td>-0.529151</td>\n",
       "      <td>1.716357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>-0.577128</td>\n",
       "      <td>-0.389529</td>\n",
       "      <td>-0.275109</td>\n",
       "      <td>-1.384752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>994</th>\n",
       "      <td>-0.270728</td>\n",
       "      <td>1.053503</td>\n",
       "      <td>-1.217568</td>\n",
       "      <td>-1.602103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>-0.406392</td>\n",
       "      <td>-0.005321</td>\n",
       "      <td>0.098018</td>\n",
       "      <td>-1.669979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>-1.171745</td>\n",
       "      <td>-0.398678</td>\n",
       "      <td>-0.651044</td>\n",
       "      <td>1.746751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>1.241512</td>\n",
       "      <td>0.671510</td>\n",
       "      <td>1.968512</td>\n",
       "      <td>-1.656706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>0.011049</td>\n",
       "      <td>-0.725268</td>\n",
       "      <td>-0.320926</td>\n",
       "      <td>0.063564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>-0.193465</td>\n",
       "      <td>-0.298635</td>\n",
       "      <td>0.577740</td>\n",
       "      <td>0.005821</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2         3\n",
       "0   -1.762329  0.883034 -0.052997  0.041565\n",
       "1    0.430549 -1.183729  0.412986 -0.537272\n",
       "2   -0.791836 -1.243000  2.175150 -0.074091\n",
       "3   -0.460999  2.022232  0.130960  0.837618\n",
       "4   -1.573649  0.687350  1.369322 -0.526989\n",
       "5   -0.460106 -0.567063 -0.734919  1.091058\n",
       "6    1.189055 -0.629686  0.222738 -0.146839\n",
       "7   -1.866358  0.045562 -1.481863  1.402034\n",
       "8   -0.743142 -1.016015  1.504146  1.061250\n",
       "9   -1.139457 -0.723509 -0.173658  0.101536\n",
       "10  -0.451404 -0.079858  0.592246  0.175387\n",
       "11  -0.265265 -0.268328 -0.112837 -0.037180\n",
       "12   0.294101  0.325259 -0.027550  0.648257\n",
       "13  -0.729797 -0.005303  1.482832  0.159528\n",
       "14  -0.007283  1.095223 -0.451293 -0.950754\n",
       "15   0.560923  0.323252 -0.848240 -1.431280\n",
       "16  -0.792667 -1.451741 -1.371192  0.649933\n",
       "17   1.945243  0.291460  0.987475 -1.255046\n",
       "18   2.094016 -0.424357  0.772499 -1.539953\n",
       "19  -1.107322  0.794572  0.976629  1.204060\n",
       "20   0.310208  1.624616 -0.834678 -0.630555\n",
       "21   0.131424 -0.390378 -0.246806  0.284426\n",
       "22   1.165107  0.230188  0.449587 -0.077723\n",
       "23  -0.962545  0.422311 -0.071149  0.632043\n",
       "24   0.381892 -0.479426 -0.170493  0.529948\n",
       "25   1.587720 -1.989404 -1.393013  1.153079\n",
       "26  -0.248147 -1.721525 -0.576941  0.378812\n",
       "27  -1.032213  0.883628 -0.947101 -0.239395\n",
       "28   0.673853  0.905111  0.765475 -0.173614\n",
       "29  -0.429051  0.081753  0.199183 -0.475000\n",
       "..        ...       ...       ...       ...\n",
       "970  0.559770  0.201509 -0.268777  0.547237\n",
       "971  0.330805  2.412644  0.471228 -0.505078\n",
       "972  0.182293  1.977773  0.547748  0.604433\n",
       "973 -0.952285  1.169918  0.355875  0.674981\n",
       "974  0.261232  1.969686  0.000135 -0.112191\n",
       "975 -1.857906  1.254443  0.464049 -2.072417\n",
       "976 -0.529595 -1.058427  0.866477 -0.344527\n",
       "977  1.317628 -0.441258  0.355024 -0.643639\n",
       "978  0.155463  0.321662 -0.110996 -1.059155\n",
       "979  2.556814  0.131430  0.535891 -0.838919\n",
       "980 -1.387140  1.205119 -1.237739  0.343781\n",
       "981 -1.189689  1.845316 -0.305253  0.021126\n",
       "982 -0.162171  1.400911  1.092700 -0.926221\n",
       "983 -0.854489  0.251458 -0.775186  0.012359\n",
       "984  0.609112  0.446108 -0.626479  1.051779\n",
       "985 -1.922156  0.286586 -0.359273 -0.355192\n",
       "986  0.553620  0.321969  0.129823  0.451698\n",
       "987  1.015369 -0.127509 -0.784002  0.664365\n",
       "988  0.350985 -1.263185 -0.801332 -1.367007\n",
       "989 -0.053244  1.262870  1.703726 -1.053201\n",
       "990 -0.562784  1.249824 -0.722703 -0.514111\n",
       "991 -0.148050  0.144428 -1.785967 -0.771632\n",
       "992  0.097118  0.470255 -0.529151  1.716357\n",
       "993 -0.577128 -0.389529 -0.275109 -1.384752\n",
       "994 -0.270728  1.053503 -1.217568 -1.602103\n",
       "995 -0.406392 -0.005321  0.098018 -1.669979\n",
       "996 -1.171745 -0.398678 -0.651044  1.746751\n",
       "997  1.241512  0.671510  1.968512 -1.656706\n",
       "998  0.011049 -0.725268 -0.320926  0.063564\n",
       "999 -0.193465 -0.298635  0.577740  0.005821\n",
       "\n",
       "[1000 rows x 4 columns]"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 17.你想要找出第2列中绝对值大小超过3的值：\n",
    "data = pd.DataFrame(np.random.randn(1000, 4))\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>0.805693</td>\n",
       "      <td>-1.591710</td>\n",
       "      <td>3.223374</td>\n",
       "      <td>0.715709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>639</th>\n",
       "      <td>-2.188559</td>\n",
       "      <td>-1.425127</td>\n",
       "      <td>3.224256</td>\n",
       "      <td>-2.162924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>700</th>\n",
       "      <td>1.502301</td>\n",
       "      <td>0.713317</td>\n",
       "      <td>3.417889</td>\n",
       "      <td>0.974149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>834</th>\n",
       "      <td>0.725027</td>\n",
       "      <td>-0.924953</td>\n",
       "      <td>3.125129</td>\n",
       "      <td>-0.382069</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2         3\n",
       "88   0.805693 -1.591710  3.223374  0.715709\n",
       "639 -2.188559 -1.425127  3.224256 -2.162924\n",
       "700  1.502301  0.713317  3.417889  0.974149\n",
       "834  0.725027 -0.924953  3.125129 -0.382069"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 第二列中绝对值大小超过3的数据\n",
    "data[np.abs(data[2])>3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>-0.863569</td>\n",
       "      <td>-0.423536</td>\n",
       "      <td>0.801542</td>\n",
       "      <td>-3.063985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>-3.211654</td>\n",
       "      <td>0.623704</td>\n",
       "      <td>-1.105608</td>\n",
       "      <td>-0.237472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>1.907201</td>\n",
       "      <td>2.221003</td>\n",
       "      <td>-0.040448</td>\n",
       "      <td>-3.407119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>2.750373</td>\n",
       "      <td>0.728336</td>\n",
       "      <td>-0.965948</td>\n",
       "      <td>3.719819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219</th>\n",
       "      <td>3.013372</td>\n",
       "      <td>-0.052631</td>\n",
       "      <td>-0.750334</td>\n",
       "      <td>0.828112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>248</th>\n",
       "      <td>0.228882</td>\n",
       "      <td>0.509118</td>\n",
       "      <td>-0.231244</td>\n",
       "      <td>3.041813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>278</th>\n",
       "      <td>3.203193</td>\n",
       "      <td>0.426526</td>\n",
       "      <td>-1.616514</td>\n",
       "      <td>0.180496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>319</th>\n",
       "      <td>3.101487</td>\n",
       "      <td>0.461149</td>\n",
       "      <td>0.705578</td>\n",
       "      <td>-0.353792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>407</th>\n",
       "      <td>-3.779209</td>\n",
       "      <td>0.594318</td>\n",
       "      <td>0.551821</td>\n",
       "      <td>-0.256855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>410</th>\n",
       "      <td>-1.108610</td>\n",
       "      <td>1.408329</td>\n",
       "      <td>-3.267989</td>\n",
       "      <td>-1.235609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>449</th>\n",
       "      <td>2.007264</td>\n",
       "      <td>0.321691</td>\n",
       "      <td>3.037181</td>\n",
       "      <td>-1.289358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>506</th>\n",
       "      <td>3.436243</td>\n",
       "      <td>-1.412631</td>\n",
       "      <td>-0.727978</td>\n",
       "      <td>-0.380092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>535</th>\n",
       "      <td>-0.910244</td>\n",
       "      <td>-1.057303</td>\n",
       "      <td>-3.028637</td>\n",
       "      <td>2.629217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>587</th>\n",
       "      <td>0.573386</td>\n",
       "      <td>-0.482434</td>\n",
       "      <td>-0.971487</td>\n",
       "      <td>-3.256600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>806</th>\n",
       "      <td>1.603967</td>\n",
       "      <td>-1.277486</td>\n",
       "      <td>3.255579</td>\n",
       "      <td>-0.177949</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>961</th>\n",
       "      <td>3.726684</td>\n",
       "      <td>1.026143</td>\n",
       "      <td>1.178645</td>\n",
       "      <td>-0.900671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>976</th>\n",
       "      <td>-0.791011</td>\n",
       "      <td>-3.531818</td>\n",
       "      <td>0.051665</td>\n",
       "      <td>-2.201434</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0         1         2         3\n",
       "17  -0.863569 -0.423536  0.801542 -3.063985\n",
       "21  -3.211654  0.623704 -1.105608 -0.237472\n",
       "88   1.907201  2.221003 -0.040448 -3.407119\n",
       "162  2.750373  0.728336 -0.965948  3.719819\n",
       "219  3.013372 -0.052631 -0.750334  0.828112\n",
       "248  0.228882  0.509118 -0.231244  3.041813\n",
       "278  3.203193  0.426526 -1.616514  0.180496\n",
       "319  3.101487  0.461149  0.705578 -0.353792\n",
       "407 -3.779209  0.594318  0.551821 -0.256855\n",
       "410 -1.108610  1.408329 -3.267989 -1.235609\n",
       "449  2.007264  0.321691  3.037181 -1.289358\n",
       "506  3.436243 -1.412631 -0.727978 -0.380092\n",
       "535 -0.910244 -1.057303 -3.028637  2.629217\n",
       "587  0.573386 -0.482434 -0.971487 -3.256600\n",
       "806  1.603967 -1.277486  3.255579 -0.177949\n",
       "961  3.726684  1.026143  1.178645 -0.900671\n",
       "976 -0.791011 -3.531818  0.051665 -2.201434"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 18.要选出全部含有“绝对值大于3”的行.(提示:any方法)\n",
    "data = pd.DataFrame(np.random.randn(1000, 4))\n",
    "data[(np.abs(data)>3).any(axis=1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Dave': 'dave@google.com',\n",
       " 'Steve': 'steve@gmail.com',\n",
       " 'Rob': 'rob@gmail.com',\n",
       " 'Wes': nan}"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 19.检测各电子邮件是否包含”gmail”\n",
    "data = {'Dave': 'dave@google.com', 'Steve': 'steve@gmail.com',\n",
    "'Rob': 'rob@gmail.com', 'Wes': np.nan}\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dave     dave@google.com\n",
       "Steve    steve@gmail.com\n",
       "Rob        rob@gmail.com\n",
       "Wes                  NaN\n",
       "dtype: object"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将字典转换成Series对象\n",
    "data=pd.Series(data)\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dave      True\n",
       "Steve     True\n",
       "Rob       True\n",
       "Wes      False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#返回内容是否不空\n",
    "data.notnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dave     False\n",
       "Steve     True\n",
       "Rob       True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 返回不空数据中是否包含gmail\n",
    "data[data.notnull()].str.contains(\"gmail\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dave     False\n",
       "Steve     True\n",
       "Rob       True\n",
       "Wes        NaN\n",
       "dtype: object"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#返回所有数据中是否包含gmail\n",
    "data.str.contains(\"gmail\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>data1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>c</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>a</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>b</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key  data1\n",
       "0   b      0\n",
       "1   b      1\n",
       "2   a      2\n",
       "3   c      3\n",
       "4   a      4\n",
       "5   a      5\n",
       "6   b      6"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>d</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key  data2\n",
       "0   a      0\n",
       "1   b      1\n",
       "2   d      2"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 20.两个数据源取交集\n",
    "df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],'data1': range(7)})\n",
    "df2 = pd.DataFrame({'key': ['a', 'b', 'd'], 'data2': range(3)})\n",
    "display(df1,df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>b</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>a</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key  data1  data2\n",
       "0   b      0      1\n",
       "1   b      1      1\n",
       "2   b      6      1\n",
       "3   a      2      0\n",
       "4   a      4      0\n",
       "5   a      5      0"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 公共字段相等进行组合\n",
    "pd.merge(df1,df2,on=\"key\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.857800</td>\n",
       "      <td>-1.656852</td>\n",
       "      <td>-0.519872</td>\n",
       "      <td>-0.750969</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.019854</td>\n",
       "      <td>0.788684</td>\n",
       "      <td>-1.809611</td>\n",
       "      <td>-0.177360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.242111</td>\n",
       "      <td>-0.130182</td>\n",
       "      <td>0.144902</td>\n",
       "      <td>0.202721</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         c         d\n",
       "0  1.857800 -1.656852 -0.519872 -0.750969\n",
       "1  0.019854  0.788684 -1.809611 -0.177360\n",
       "2  1.242111 -0.130182  0.144902  0.202721"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.195546</td>\n",
       "      <td>0.825777</td>\n",
       "      <td>0.601532</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.776961</td>\n",
       "      <td>1.107528</td>\n",
       "      <td>-1.077237</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b         d         a\n",
       "0 -0.195546  0.825777  0.601532\n",
       "1  0.776961  1.107528 -1.077237"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 21.连接两个数据源\n",
    "df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])\n",
    "df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])\n",
    "display(df1,df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.857800</td>\n",
       "      <td>-1.656852</td>\n",
       "      <td>-0.519872</td>\n",
       "      <td>-0.750969</td>\n",
       "      <td>-0.195546</td>\n",
       "      <td>0.825777</td>\n",
       "      <td>0.601532</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.019854</td>\n",
       "      <td>0.788684</td>\n",
       "      <td>-1.809611</td>\n",
       "      <td>-0.177360</td>\n",
       "      <td>0.776961</td>\n",
       "      <td>1.107528</td>\n",
       "      <td>-1.077237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.242111</td>\n",
       "      <td>-0.130182</td>\n",
       "      <td>0.144902</td>\n",
       "      <td>0.202721</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         c         d         b         d         a\n",
       "0  1.857800 -1.656852 -0.519872 -0.750969 -0.195546  0.825777  0.601532\n",
       "1  0.019854  0.788684 -1.809611 -0.177360  0.776961  1.107528 -1.077237\n",
       "2  1.242111 -0.130182  0.144902  0.202721       NaN       NaN       NaN"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将df2拼接到df1的右边\n",
    "pd.concat([df1,df2],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     a    b   c\n",
       "0  1.0  NaN   2\n",
       "1  NaN  2.0   6\n",
       "2  5.0  NaN  10\n",
       "3  NaN  6.0  14"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     a    b\n",
       "0  5.0  NaN\n",
       "1  4.0  3.0\n",
       "2  NaN  4.0\n",
       "3  3.0  6.0\n",
       "4  7.0  8.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#22.组合两个数据源(尽量补全NaN数据)\n",
    "df1 = pd.DataFrame({'a': [1., np.nan, 5., np.nan],'b': [np.nan, 2., np.nan, 6.],\n",
    "'c': range(2, 18, 4)})\n",
    "df2 = pd.DataFrame({'a': [5., 4., np.nan, 3., 7.],'b': [np.nan, 3., 4., 6., 8.]})\n",
    "display(df1,df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     a    b     c\n",
       "0  1.0  NaN   2.0\n",
       "1  4.0  2.0   6.0\n",
       "2  5.0  4.0  10.0\n",
       "3  3.0  6.0  14.0\n",
       "4  7.0  8.0   NaN"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#使用df2填充df1中的NaN数据\n",
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "      <td>-0.224870</td>\n",
       "      <td>-0.928069</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "      <td>0.136243</td>\n",
       "      <td>-1.038779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "      <td>-1.203413</td>\n",
       "      <td>0.802964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "      <td>-1.369542</td>\n",
       "      <td>-0.878547</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "      <td>-0.578134</td>\n",
       "      <td>0.658734</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2     data1     data2\n",
       "0    a  one -0.224870 -0.928069\n",
       "1    a  two  0.136243 -1.038779\n",
       "2    b  one -1.203413  0.802964\n",
       "3    b  two -1.369542 -0.878547\n",
       "4    a  one -0.578134  0.658734"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#23.将按照data1列按照key1列进行分组,并求每组的平均值\n",
    "df = pd.DataFrame({\n",
    "'key1' : ['a', 'a', 'b', 'b', 'a'], \n",
    "'key2' : ['one', 'two', 'one', 'two', 'one'],\n",
    "'data1' : np.random.randn(5),\n",
    "'data2' : np.random.randn(5)})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x0000004C4079CDA0>"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped = df.groupby('key1')\n",
    "grouped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1\n",
       "a   -0.222254\n",
       "b   -1.286478\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#求平均值\n",
    "grouped.mean()[\"data1\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>-0.222254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.286478</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1\n",
       "key1          \n",
       "a    -0.222254\n",
       "b    -1.286478"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按照key1列进行分组，统计data1列的平均值\n",
    "df.pivot_table(index='key1',values='data1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>-0.222254</td>\n",
       "      <td>-0.436038</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.286478</td>\n",
       "      <td>-0.037792</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1     data2\n",
       "key1                    \n",
       "a    -0.222254 -0.436038\n",
       "b    -1.286478 -0.037792"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按照key1列进行分组，统计data1列和data2列的平均值,aggfunc=np.mean是默认值，可以不写\n",
    "df.pivot_table(index='key1',values=['data1','data2'],aggfunc=np.mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "New York    8    9     10    11"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#24.求one列的最大值\n",
    "data = pd.DataFrame(np.arange(12).reshape((3, 4)),index=['Ohio', 'Colorado', 'New York'],columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[\"one\"].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "New York    8    9     10    11"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 25.新增five列,值为one列和two列的和\n",
    "data = pd.DataFrame(np.arange(12).reshape((3, 4)),index=['Ohio', 'Colorado', 'New York'],columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio         1\n",
       "Colorado     9\n",
       "New York    17\n",
       "Name: five, dtype: int32"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['five']=data['one']+data['two']\n",
    "data['five']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
